"""vmgirl，单相册图片爬取"""
import os
import re
import time

import requests

# # 网页地址
# url = "https://www.vmgirls.net/18629.html"
# # 保存目录
# base_dir = 'E:\\T\\Picture\\vmgirls'
# res = requests.get(url)
# # 输出请求头，中包含请求信息user-agent
# # print(res.request.headers)
# html = res.text
# # print(html)
# title = re.findall('<h1 class="post-title mb-3">(.*?)</h1>', html)[0]
# # print(title)
# if not os.path.exists(base_dir + os.sep + title):
#     os.mkdir(base_dir + os.sep + title)
# list1 = re.findall('<a href="(.*?)" alt=".*?" title=".*?">', html)
# # print(list1)
# for i in list1:
#     name = i.split('/')[-1]
#     time.sleep(0.5)
#     resp = requests.get(i)
#     with open(base_dir + os.sep + title + os.sep + name, "wb") as f:
#         f.write(resp.content)

# 网页地址
url = "https://www.vmgirls.net/special/bilitis"
# 保存目录 相册
base_dir = 'E:\\T\\Picture\\vmgirls\\'
while True:
    try:
        print("建立连接")
        html = requests.get(url).text
    except Exception as e:
        print('连接异常')
    else:
        break
re1 = re.compile('<a href="(.*?)" title="(.*?)" class=".*?" >')
albums = re1.findall(html)

# albums = re.finditer('<a href="(?P<href>.*?)" title="(?P<title>.*?)" class=".*?" >', html)
# for i in reversed(albums):
#     print(i.group("title"))


for a in albums[:-4:-1]:
    href, title = a
    if not os.path.exists(base_dir + title):
        print(f"创建相册{title}")
        os.mkdir(base_dir + title)
    while True:
        try:
            html1 = requests.get(href).text
        except Exception:
            print('连接异常')
        else:
            break

    list1 = re.findall('<a href="(.*?)" alt=".*?" title=".*?">', html1)
    print(f"相册{title}图片爬取中")
    for i in list1:
        name = i.split('/')[-1]
        resp = requests.get(i)
        if not os.path.exists(base_dir + title + os.sep + name):
            time.sleep(0.5)
            with open(base_dir + title + os.sep + name, "wb") as f:
                print(f"写入相片{name}")
                f.write(resp.content)
